home *** CD-ROM | disk | FTP | other *** search
/ Info-Mac 4 / Info_Mac IV CD-ROM (Pacific HiTech Inc.)(August 1994).iso / Development / Source / DBL Pascal Library / Levenshtein ƒ / Levenshtein.p < prev    next >
Text File  |  1992-05-06  |  6KB  |  242 lines

  1. unit Levenshtein;
  2.  
  3. {Compute the Levenshtein distance between a pair of strings.}
  4. {Adapted from C code presented in "Finding String Distances",}
  5. {Ray Valdés, Dr. Dobb’s Journal, April 1992, ppg. 56— 62, 107.}
  6. {Algorithm due to V.I. Levenshtein, as presented in "Time Warps,}
  7. {String Edits, and MacroMolecules: The Theory and Practice of}
  8. {Sequence Comparison", Sankoff and Kruskal, eds., Addison–Wesley,}
  9. {1983. Macintosh implementation for THINK Pascal by D.B.Lamkins.}
  10.  
  11. interface
  12.  
  13.     type
  14.         Opcode = (match, insert, delete, substitute);
  15.         LevOp = record
  16.                 iA, iB: Integer;
  17.                 op: Opcode;
  18.             end;
  19.         LevOps = array[1..255] of LevOp;
  20.         LevOpsPtr = ^LevOps;
  21.         LevOpsHdl = ^LevOpsPtr;
  22.  
  23. {Call InitLevDist to establish the costs of the four edit operations.}
  24.     procedure InitLevDist (matchCost, insertCost, deleteCost, substituteCost: Integer);
  25.  
  26. {LevDist returns the Levenshtein distance between the given strings. When non–nil,}
  27. {theOps is resized and filled in with the edit sequence (as defined in the article) and}
  28. {moves is the number of edits.}
  29.     function LevDist (a, b: Str255; theOps: LevOpsHdl; var moves: Integer): Integer;
  30.  
  31. implementation
  32.  
  33.     type
  34.         MatrixCell = record
  35.                 distance: Integer;
  36.                 op: Opcode;
  37.             end;
  38.         MatrixCellPtr = ^MatrixCell;
  39.         MatrixCellHdl = ^MatrixCellPtr;
  40.         Move = record
  41.                 dRow, dCol: Integer;
  42.             end;
  43.  
  44.     var
  45.         theMatrix: MatrixCellHdl;
  46.         theCost: array[Opcode] of Integer;
  47.         theMoves: array[Opcode] of Move;
  48.  
  49.     procedure InitLevDist (matchCost, insertCost, deleteCost, substituteCost: Integer);
  50.     begin
  51.         theMatrix := nil;
  52.         theCost[match] := matchCost;
  53.         theCost[insert] := insertCost;
  54.         theCost[delete] := deleteCost;
  55.         theCost[substitute] := substituteCost;
  56.         with theMoves[match] do
  57.             begin
  58.                 dRow := -1;
  59.                 dCol := -1;
  60.             end;
  61.         with theMoves[insert] do
  62.             begin
  63.                 dRow := 0;
  64.                 dCol := -1;
  65.             end;
  66.         with theMoves[delete] do
  67.             begin
  68.                 dRow := -1;
  69.                 dCol := 0;
  70.             end;
  71.         with theMoves[substitute] do
  72.             begin
  73.                 dRow := -1;
  74.                 dCol := -1;
  75.             end;
  76.     end;
  77.  
  78.     function LevDist (a, b: Str255; theOps: LevOpsHdl; var moves: Integer): Integer;
  79.         var
  80.             numRows, numCols: Integer;
  81.  
  82.         procedure InitializeMatrix;
  83.             var
  84.                 i: Integer;
  85.                 p: MatrixCellPtr;
  86.         begin
  87.             with theMatrix^^ do
  88.                 begin
  89.                     distance := 0;
  90.                     op := delete;
  91.                 end;
  92.             p := theMatrix^;
  93.             for i := 1 to numCols - 1 do
  94.                 begin
  95.                     p := MatrixCellPtr(ORD(p) + SIZEOF(MatrixCell));
  96.                     with p^ do
  97.                         begin
  98.                             distance := i;
  99.                             op := insert;
  100.                         end;
  101.                 end;
  102.             p := theMatrix^;
  103.             for i := 1 to numRows - 1 do
  104.                 begin
  105.                     p := MatrixCellPtr(ORD(p) + SIZEOF(MatrixCell) * numCols);
  106.                     with p^ do
  107.                         begin
  108.                             distance := i;
  109.                             op := delete;
  110.                         end;
  111.                 end;
  112.         end;
  113.  
  114.         procedure CalculateMatrix;
  115.             var
  116.                 pC, pN, pW, pNW: MatrixCellPtr;
  117.  
  118.             procedure AdvancePtrs;
  119.             begin
  120.                 pC := MatrixCellPtr(ORD(pC) + SIZEOF(MatrixCell));
  121.                 pN := MatrixCellPtr(ORD(pN) + SIZEOF(MatrixCell));
  122.                 pW := MatrixCellPtr(ORD(pW) + SIZEOF(MatrixCell));
  123.                 pNW := MatrixCellPtr(ORD(pNW) + SIZEOF(MatrixCell));
  124.             end;
  125.  
  126.             var
  127.                 row, col: Integer;
  128.  
  129.             procedure CalculateCell;
  130.             begin
  131.                 if pW^.distance < pN^.distance then
  132.                     if pW^.distance < pNW^.distance then
  133.                         begin
  134.                             pC^.op := insert;
  135.                             pC^.distance := pW^.distance + theCost[insert];
  136.                         end
  137.                     else if a[row] = b[col] then
  138.                         begin
  139.                             pC^.op := match;
  140.                             pC^.distance := pNW^.distance + theCost[match];
  141.                         end
  142.                     else
  143.                         begin
  144.                             pC^.op := substitute;
  145.                             pC^.distance := pNW^.distance + theCost[substitute];
  146.                         end
  147.                 else if pN^.distance < pNW^.distance then
  148.                     begin
  149.                         pC^.op := delete;
  150.                         pC^.distance := pN^.distance + theCost[delete];
  151.                     end
  152.                 else if a[row] = b[col] then
  153.                     begin
  154.                         pC^.op := match;
  155.                         pC^.distance := pNW^.distance + theCost[match];
  156.                     end
  157.                 else
  158.                     begin
  159.                         pC^.op := substitute;
  160.                         pC^.distance := pNW^.distance + theCost[substitute];
  161.                     end;
  162.             end;
  163.  
  164.         begin
  165.             pC := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numCols + 1));
  166.             pN := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (0 + 1));
  167.             pW := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numCols + 0));
  168.             pNW := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (0 + 0));
  169.             for row := 1 to numRows - 1 do
  170.                 begin
  171.                     for col := 1 to numCols - 1 do
  172.                         begin
  173.                             CalculateCell;
  174.                             AdvancePtrs;
  175.                         end;
  176.                     AdvancePtrs;
  177.                 end;
  178.         end;
  179.  
  180.         procedure BacktrackMatrix;
  181.             var
  182.                 pC: MatrixCellPtr;
  183.                 theDistance, index, row, col, deltaRow, deltaCol: Integer;
  184.                 whichOp: Opcode;
  185.         begin
  186.             pC := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numRows * numCols - 1));
  187.             theDistance := pC^.distance;
  188.             if theOps <> nil then
  189.                 begin
  190.                     SetHandleSize(Handle(theOps), (numRows + numCols) * SIZEOF(LevOp));
  191.                     index := 0;
  192.                     row := numRows - 1;
  193.                     col := numCols - 1;
  194.                     while (row > 0) | (col > 0) do
  195.                         begin
  196.                             whichOp := pC^.op;
  197.                             if whichOp <> match then
  198.                                 begin
  199.                                     index := index + 1;
  200.                                     with theOps^^[index] do
  201.                                         begin
  202.                                             iA := row;
  203.                                             iB := col;
  204.                                             op := whichOp;
  205.                                         end;
  206.                                 end;
  207.                             with theMoves[whichOp] do
  208.                                 begin
  209.                                     deltaRow := dRow;
  210.                                     deltaCol := dCol;
  211.                                 end;
  212.                             pC := MatrixCellPtr(ORD(pC) + (deltaRow * numCols + deltaCol) * SIZEOF(MatrixCell));
  213.                             row := row + deltaRow;
  214.                             col := col + deltaCol;
  215.                         end;
  216.                 end;
  217.             SetHandleSize(Handle(theOps), index * SIZEOF(LevOp));
  218.             moves := index;
  219.             LevDist := theDistance;
  220.         end;
  221.  
  222.         var
  223.             sizeNeeded: Size;
  224.  
  225.     begin {LevDist}
  226.         numRows := length(a) + 1;
  227.         numCols := length(b) + 1;
  228.         if (theMatrix = nil) | (theMatrix^ = nil) then
  229.             theMatrix := MatrixCellHdl(NewHandle(0));
  230.         HNoPurge(Handle(theMatrix));
  231.         sizeNeeded := Size(SIZEOF(MatrixCell)) * numRows * numCols;
  232.         if sizeNeeded > GetHandleSize(Handle(theMatrix)) then
  233.             SetHandleSize(Handle(theMatrix), SIZEOF(MatrixCell) * numRows * numCols);
  234.         HLock(Handle(theMatrix));
  235.         InitializeMatrix;
  236.         CalculateMatrix;
  237.         BacktrackMatrix;
  238.         HUnlock(Handle(theMatrix));
  239.         HPurge(Handle(theMatrix));
  240.     end;
  241.  
  242. end.